1. Converting To Strings


In [34]:
# convert another data type into a string
s = str(42)
s


Out[34]:
'42'

In [35]:
# convert to a string with 2 decimal places
'{:.2f}'.format(3.14159)


Out[35]:
'3.14'

2. Get Parts Of A String


In [36]:
s='I like you'
s[0]


Out[36]:
'I'

In [37]:
len(s)


Out[37]:
10

String slicing is like list slicing:


In [38]:
s[:6]


Out[38]:
'I like'

In [39]:
s[7:]


Out[39]:
'you'

In [40]:
s[-1]


Out[40]:
'u'

In [41]:
s[2:-1:2]


Out[41]:
'lk o'

Using the find function


In [42]:
# returns -1 if not found
s.find('like')


Out[42]:
2

In [43]:
# returns -1 if not found
s.find('hate')


Out[43]:
-1

Check start of string


In [44]:
s.startswith('I') # also endswith


Out[44]:
True

In [45]:
# pass a tuple to startswith/endswith to check for any of multiple strings

places = ['france', 'http://france.com', 'USA', 'https://usa.com']

[n for n in places if n.startswith(('http://', 'https://'))]


Out[45]:
['http://france.com', 'https://usa.com']

fmatch mathes strings using ok-like wildcards


In [46]:
# use this when you just want to add a simple wildcard

from fnmatch import fnmatch, fnmatchcase
#fnmatch uses the case sensitivity of the OS, fnmatchcase lets you specify case sensitivity

names = ['Dat1.csv', 'Dat2.csv', 'config.ini', 'foo.py']
[name for name in names if fnmatch(name, 'Dat*.csv')]


Out[46]:
['Dat1.csv', 'Dat2.csv']

3. isdigit


In [47]:
# checks if every character in the string is a digit
s.isdigit()


Out[47]:
False

4. Common Modifications

These functions return a new string. The original is unmodified.


In [48]:
s.lower() # also upper()


Out[48]:
'i like you'

In [49]:
# replaces all instances of 'like' with 'love'
s.replace('like', 'love')


Out[49]:
'I love you'

In [50]:
# remove leading and trailing whitespace
s5 = '    ham and cheese '
s5.strip()


Out[50]:
'ham and cheese'

In [51]:
s5.lstrip()


Out[51]:
'ham and cheese '

In [52]:
s5.rstrip()


Out[52]:
'    ham and cheese'

5. Splitting and Joining

Split a string:


In [53]:
# split a string into a list of substrings seperated by a delimiter
s.split(' ')


Out[53]:
['I', 'like', 'you']

In [54]:
# default delimiter is a space
s.split()


Out[54]:
['I', 'like', 'you']

Using regular expressions to split with multiple delimiters and whitespace handling. See Regular Expressions for more on regular expressions.


In [55]:
# splitting with a regex allows using multiple delimiters and whitespace handling
import re
line = 'asdf fjdk; afed, fjek,asdf, foo'
re.split(r'[;,\s]\s*', line)


Out[55]:
['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']

In [56]:
# don't use capturing groups (parenthesis) or the delimiters are captured as fields in the split string
re.split(r'(;|,|\s)\s*', line)


Out[56]:
['asdf', ' ', 'fjdk', ';', 'afed', ',', 'fjek', ',', 'asdf', ',', 'foo']

In [57]:
# instead use non-capturing groups / parenthesis
re.split(r'(?:,|;|\s)\s*', line)  # note the '(?:' starting the group, which denotes a non-capturing group


Out[57]:
['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']

Join / concatenate strings


In [102]:
# join a list of strings into one sring using a delimiter
stooges = ['larry', 'curly', 'moe']
' '.join(stooges)


Out[102]:
'larry curly moe'

In [103]:
# convert to string and format in one step
data = ['ACME', 50, 91.1]
','.join(str(d) for d in data)


Out[103]:
'ACME,50,91.1'

In [80]:
#string concatenation
s + ' frank'


Out[80]:
'I like you frank'

In [83]:
# using a dictionary and tranlate() to map chars in a string
remap = {
    ord('\t'): ' ',
    ord('\f'): ' ',
    ord('\r'): None,  #char is deleted from translated string
    ord('\n'): None,
}
s = 'python\fis\tawesome\r\n'

s.translate(remap)


Out[83]:
'python is awesome'

6. String Formatting


In [60]:
# old way
'raining %s and %s' % ('cats', 'dogs')


Out[60]:
'raining cats and dogs'

In [61]:
# new way
'raining {} and {}'.format('cats', 'dogs')


Out[61]:
'raining cats and dogs'

In [62]:
#new way (using named arguments)
'raining {animal1} and {animal2}'.format(animal1='cats', animal2='dogs')


Out[62]:
'raining cats and dogs'

Formatting parameters (more examples):


In [1]:
# use 2 decimal places
'pi is {:.2f}'.format(3.14159)


Out[1]:
'pi is 3.14'

In [3]:
#adding thousands seperator to format
'average hieght is {:,.2f} mm'.format(45678789.7653)


Out[3]:
'average hieght is 45,678,789.77 mm'

text justification


In [85]:
# useful when outputing fixed-width text records
text = 'Hello World'
text.ljust(20)


Out[85]:
'Hello World         '

In [86]:
text.rjust(20)


Out[86]:
'         Hello World'

In [87]:
text.center(20)


Out[87]:
'    Hello World     '

In [88]:
# fill with a specific char
text.center(20,'*')


Out[88]:
'****Hello World*****'

using format for text justification


In [89]:
format(text, '>20')


Out[89]:
'         Hello World'

In [90]:
format(text, '<20')


Out[90]:
'Hello World         '

In [91]:
format(text, '^20')


Out[91]:
'    Hello World     '

In [97]:
# fill with a specific character
format(text, '*^20s')


Out[97]:
'****Hello World*****'

In [98]:
#format works with non strings
x = 1.2345
format(x, '*^10')


Out[98]:
'**1.2345**'

In [104]:
# format is useful when formatting multiple values or non-strings
'{:>10s}{:>10s}{:*>10f}'.format('Hello', 'World', 1.23)


Out[104]:
'     Hello     World**1.230000'

using variable names in format strings


In [105]:
# using named parameters
s = '{name} has {n} messages.'
s.format(name='Guido', n=37)


Out[105]:
'Guido has 37 messages.'

In [106]:
# using local vairables
name = 'Guido'
n = 37
s.format_map(vars())


Out[106]:
'Guido has 37 messages.'

In [108]:
#using variables in a class instance
class Info:
    def __init__(self, name, n):
        self.name = name
        self.n = n

info = Info('Guido',37)
s.format_map(vars(info))


Out[108]:
'Guido has 37 messages.'

7. Raw Strings


In [64]:
# normal strings allow for escaped characters
print('first line\nsecond line')


first line
second line

In [65]:
# raw strings treat backslashes as literal characters
print(r'first line\nsecond line')


first line\nsecond line

8. Regular Expressions


In [66]:
import re

text1 = '11/27/2012'
text2 = 'Nov 27, 2012'

# use a raw string to define reg exps
m = re.match(r'\d+/\d+/\d+', text1)
if m:
    print(m.group())  # returns the entire matched string
else:
    print('no match')


11/27/2012

If you are going to reuse the expression a bunch, precompile it


In [67]:
datepat = re.compile(r'\d+/\d+/\d+')
if(datepat.match(text1)):
    print('yes')
else:
    print('no')


yes

Capture groups


In [68]:
datepat = re.compile(r'(\d+)/(\d+)/(\d+)') # parantheses define capture groups.
m = datepat.match(text1)
m.group()


Out[68]:
'11/27/2012'

In [69]:
m.group(0)


Out[69]:
'11/27/2012'

In [70]:
m.group(1)


Out[70]:
'11'

In [71]:
m.group(2)


Out[71]:
'27'

In [72]:
m.group(3)


Out[72]:
'2012'

In [73]:
m.groups()  # returns a tuple


Out[73]:
('11', '27', '2012')

finding all matches


In [74]:
text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'
datepat.findall(text)  # returns a list of tuples


Out[74]:
[('11', '27', '2012'), ('3', '13', '2013')]

In [75]:
# finditer is like findall but returns an iterable
for m in datepat.finditer(text):
    print(m.groups())


('11', '27', '2012')
('3', '13', '2013')

replacing strings with reg exps


In [76]:
# the backslashes in the replacement string reference capture groups
re.sub(r'(\d+)/(\d+)/(\d+)', r'\3-\1-\2', text)


Out[76]:
'Today is 2012-11-27. PyCon starts 2013-3-13.'

In [77]:
#using a callback function for the substitution string
def change_date(m):
    return '{}|{}|{}'.format(m.group(2), m.group(1), m.group(3))

datepat.sub(change_date, text)


Out[77]:
'Today is 27|11|2012. PyCon starts 13|3|2013.'

Case insensitive search with reg exps


In [78]:
text = 'UPPER PYTHON, lower python, Mixed Python'
re.findall('python', text, flags=re.IGNORECASE)


Out[78]:
['PYTHON', 'python', 'Python']

In [110]:
# note that case is not carriedd through in a case insensitive replace.
re.sub('python', 'snake', text, flags=re.IGNORECASE)


Out[110]:
'Hello World'

9. Handling XML Entities

Converting to entities


In [111]:
s = 'Elements are written as "<tag>text</tag>".'

import html
html.escape(s)


Out[111]:
'Elements are written as &quot;&lt;tag&gt;text&lt;/tag&gt;&quot;.'

In [113]:
# disable escaping of quotes

html.escape(s, quote=False)


Out[113]:
'Elements are written as "&lt;tag&gt;text&lt;/tag&gt;".'

Converting from entities


In [115]:
# xml entities
t = 'The prompt is &gt;&gt;&gt;'

from xml.sax.saxutils import unescape
unescape(t)


Out[115]:
'The prompt is >>>'

In [118]:
# html
s = 'Spicy &quot;Jalape&#241;o&quot.'
import html
html.parser.unescape(s)


Out[118]:
'Spicy "Jalapeño".'